import requests
import json
import math
from liebiaowang import get_company_url_from_page_url

host_url = "http://127.0.0.1:8008"
TODO_PICI = 0
DONE_PICI = 1
def get_dance_company_url():
    '''
        通过城市分页信息, 获取所属的培训班的所有url
    '''
    # 01 获取城市分页列表信息
    url = "{}/city_pages/".format(host_url)
    headers = { 'cache-control': "no-cache"}
    res = requests.request("GET", url, headers=headers)
    res = json.loads(res.text)
    total_count = res.get('count')
    page_size = 100
    page_count = math.ceil(total_count/page_size)

    for page_num in range(1, page_count+1):
        request_url = "{}/city_pages/?page_size={}&page={}&pici={}".format(host_url, page_size, page_num, TODO_PICI)
        tmp_res = requests.request("GET", request_url, headers=headers)
        tmp_res = json.loads(tmp_res.text)
        tmp_results = tmp_res.get('results')
        # 02 通过城市分页信息 获取详情页url
        for city_page in tmp_results:
            # 03 获取详情页url的列表
            company_url_list = get_company_url_from_page_url(page_url=city_page['page_url'])
            # 04 把获取到的详情页的url存储到数据库
            for company_url in company_url_list:
                create_company_url(company_data={"city_page":city_page['id'] , "company_url": company_url, "state": 1})
            # 05 把批次设置为 已完成批次
            update_city_page_pici(city_page_data={"id":city_page['id'], "pici": DONE_PICI})

def create_company_url(company_data):
    """
    :param company_data:  {"city_page": 2,"company_url": "http://baidu.com", "state": 1}
    """
    url = "{}/dance_companys/".format(host_url)
    company_data = json.dumps(company_data)  # 把字典转成json格式
    headers = {
        'Content-Type': "application/json",
        'cache-control': "no-cache",
    }

    response = requests.request("POST", url, data=company_data, headers=headers)
    print("company_data: ", company_data)
    print("培训班url数据已插入: ", response.text)


def update_city_page_pici(city_page_data):
    """
    :param city_page_data:  {"id": 1, "pici":1}
    """
    url = "{}/city_pages/{}/".format(host_url, city_page_data['id'])
    city_page_data = json.dumps(city_page_data)  # 把字典转成json格式
    headers = {
        'Content-Type': "application/json",
        'cache-control': "no-cache",
    }

    response = requests.request("PUT", url, data=city_page_data, headers=headers)
    print("city_page_data: ", city_page_data)
    print("分页数据批次已更新: ", response.text)

if __name__ == '__main__':
    get_dance_company_url()


